library(alakazam)
library(tidyverse)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(seriation)
library(ggpubr)
library(divo)
library(ggsci)
library(poweRlaw)
library(ggbeeswarm)
library(ggthemes)
library(wesanderson)
library(Bolstad2)
metadata <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data_376276/metadata_v2.txt')
Rearrangement_all <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/rearrangement_details_kit_376276/RearrangementDetails_06-25-2019_12-33-29_AM.tsv')
Rearrangement_all <- Rearrangement_all %>% dplyr::select(sample_name, amino_acid, templates, productive_frequency) %>% dplyr::rename('sample_id'=sample_name, 'clone_id'=amino_acid, 'seq_count'=templates, 'seq_freq'=productive_frequency)
kit1 <- merge(metadata, Rearrangement_all, by.x = 'Sample', by.y = 'sample_id')
#kit1 <- kit1 %>% dplyr::select(!group_code)
#metadata <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data_12511620/metadata.txt')
Rearrangement_all <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/rearrangement_details_kit_12511620/RearrangementDetails_07-20-2020_10-27-21_AM.tsv')
Rearrangement_all <- Rearrangement_all %>% dplyr::select(sample_name, amino_acid, templates, productive_frequency) %>% dplyr::rename('sample_id'=sample_name, 'clone_id'=amino_acid, 'seq_count'=templates, 'seq_freq'=productive_frequency)
kit2 <- merge(metadata, Rearrangement_all, by.x = 'Sample', by.y = 'sample_id')
#kit2 <- kit2 %>% dplyr::select(!group_code)
Rearrangement_all <- bind_rows(kit1, kit2)
head(Rearrangement_all)
## Sample ID COMET_ID COMET_ID_nr group group_code Kit_ID
## 1 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## 2 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## 3 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## 4 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## 5 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## 6 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## clone_id seq_count seq_freq
## 1 CASSLVVGRFTMNTEAFF 1 3.064758e-05
## 2 CSAEPGEHGNQPQHF 1 3.064758e-05
## 3 CASSPDGISNQPQHF 1 3.064758e-05
## 4 CASRLRLYSNQPQHF 1 3.064758e-05
## 5 CASSKRHHLVANTEAFF 1 3.064758e-05
## 6 CASSHPRDSPGYTF 1 3.064758e-05
Make plot for T cell fraction
a <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/metadata/SampleOverview_Kit_376276.tsv')
a <- a[, c('sample_name', 'ng/uL')]
b <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/metadata/Overview_samples_kit_2_v2.tsv')
b <- b[, c('Sample_ID', 'ng_ul_DNA')]
colnames(b) <- c('sample_name', 'ng/uL')
c <- bind_rows(a, b)
c['uL'] = 32
c['ng'] = c['ng/uL'] * c['uL']
c['ng_pr_cell'] = 0.0066
c['total_genomes'] = c['ng'] / c['ng_pr_cell']
SampleOverview = read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/SampleOverview_10-11-2021_1-47-39_PM.tsv')
SampleOverview = SampleOverview %>% left_join(c, how='right', on='sample_name')
SampleOverview['tcell_fraction'] = SampleOverview['productive_templates'] / SampleOverview['total_genomes']
meta <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/metadata/metadata_all_v2.txt')
SampleOverview <- meta %>% left_join(SampleOverview, how='right', by=c('subject_id' = 'sample_name'))
SampleOverview <- SampleOverview %>% mutate('NACT-group' = case_when(group=='no_chemo'~'No-NACT',
group=='short_chemo'~'Short-interval',
group=='long_chemo'~'Long-interval'))
SampleOverview$`NACT-group` <- factor(SampleOverview$`NACT-group`, levels=c('No-NACT', 'Short-interval', 'Long-interval'))
same_patient <- c(
'LivMet_38', 'LivMet_11', 'LivMet_14', 'LivMet_17',
'LivMet_40', 'LivMet_42', 'LivMet_33'
)
SampleOverview <- SampleOverview %>% filter(!subject_id %in% same_patient)
SampleOverview
## # A tibble: 85 × 28
## filename subject_id ID COMET_ID COMET_ID_nr group group_code Kit_ID chemo
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 LivMet_1… LivMet_1 1 COMET-0… 26 no_c… 0 376276 no
## 2 LivMet_1… LivMet_10 10 COMET-0… 24 no_c… 0 376276 no
## 3 LivMet_1… LivMet_12 12 COMET-0… 27 long… 2 376276 yes
## 4 LivMet_1… LivMet_13 13 COMET-0… 30 shor… 1 376276 yes
## 5 LivMet_1… LivMet_15 15 COMET-0… 31 shor… 1 376276 yes
## 6 LivMet_1… LivMet_16 16 COMET-0… 32 no_c… 0 376276 no
## 7 LivMet_1… LivMet_18 18 COMET-0… 34 no_c… 0 376276 no
## 8 LivMet_1… LivMet_19 19 COMET-0… 38 shor… 1 376276 yes
## 9 LivMet_2… LivMet_2 2 COMET-0… 35 no_c… 0 376276 no
## 10 LivMet_2… LivMet_20 20 COMET-0… 40 no_c… 0 376276 no
## # … with 75 more rows, and 19 more variables: cluster <dbl>,
## # total_templates <dbl>, productive_templates <dbl>,
## # fraction_productive <dbl>, total_rearrangements <dbl>,
## # productive_rearrangements <dbl>, productive_simpson_clonality <dbl>,
## # max_productive_frequency <dbl>, locus <chr>, sample_tags <lgl>, sku <chr>,
## # test_name <chr>, ng/uL <dbl>, uL <dbl>, ng <dbl>, ng_pr_cell <dbl>,
## # total_genomes <dbl>, tcell_fraction <dbl>, NACT-group <fct>
SampleOverview %>% filter(group == 'no_chemo')
## # A tibble: 35 × 28
## filename subject_id ID COMET_ID COMET_ID_nr group group_code Kit_ID chemo
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 LivMet_1… LivMet_1 1 COMET-0… 26 no_c… 0 376276 no
## 2 LivMet_1… LivMet_10 10 COMET-0… 24 no_c… 0 376276 no
## 3 LivMet_1… LivMet_16 16 COMET-0… 32 no_c… 0 376276 no
## 4 LivMet_1… LivMet_18 18 COMET-0… 34 no_c… 0 376276 no
## 5 LivMet_2… LivMet_2 2 COMET-0… 35 no_c… 0 376276 no
## 6 LivMet_2… LivMet_20 20 COMET-0… 40 no_c… 0 376276 no
## 7 LivMet_2… LivMet_22 22 COMET-0… 42 no_c… 0 376276 no
## 8 LivMet_2… LivMet_25 25 COMET-0… 49 no_c… 0 376276 no
## 9 LivMet_2… LivMet_26 26 COMET-0… 51 no_c… 0 376276 no
## 10 LivMet_2… LivMet_27 27 COMET-0… 52 no_c… 0 376276 no
## # … with 25 more rows, and 19 more variables: cluster <dbl>,
## # total_templates <dbl>, productive_templates <dbl>,
## # fraction_productive <dbl>, total_rearrangements <dbl>,
## # productive_rearrangements <dbl>, productive_simpson_clonality <dbl>,
## # max_productive_frequency <dbl>, locus <chr>, sample_tags <lgl>, sku <chr>,
## # test_name <chr>, ng/uL <dbl>, uL <dbl>, ng <dbl>, ng_pr_cell <dbl>,
## # total_genomes <dbl>, tcell_fraction <dbl>, NACT-group <fct>
SampleOverview %>% filter(group == 'short_chemo')
## # A tibble: 35 × 28
## filename subject_id ID COMET_ID COMET_ID_nr group group_code Kit_ID chemo
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 LivMet_1… LivMet_13 13 COMET-0… 30 shor… 1 376276 yes
## 2 LivMet_1… LivMet_15 15 COMET-0… 31 shor… 1 376276 yes
## 3 LivMet_1… LivMet_19 19 COMET-0… 38 shor… 1 376276 yes
## 4 LivMet_2… LivMet_23 23 COMET-0… 43 shor… 1 376276 yes
## 5 LivMet_2… LivMet_24 24 COMET-0… 46 shor… 1 376276 yes
## 6 LivMet_3… LivMet_30 30 COMET-0… 55 shor… 1 376276 yes
## 7 LivMet_3… LivMet_32 32 COMET-0… 62 shor… 1 376276 yes
## 8 LivMet_3… LivMet_37 37 COMET-0… 5 shor… 1 376276 yes
## 9 LivMet_4… LivMet_41 41 COMET-0… 37 shor… 1 376276 yes
## 10 LivMet_4… LivMet_46 46 COMET-0… 83 shor… 1 376276 yes
## # … with 25 more rows, and 19 more variables: cluster <dbl>,
## # total_templates <dbl>, productive_templates <dbl>,
## # fraction_productive <dbl>, total_rearrangements <dbl>,
## # productive_rearrangements <dbl>, productive_simpson_clonality <dbl>,
## # max_productive_frequency <dbl>, locus <chr>, sample_tags <lgl>, sku <chr>,
## # test_name <chr>, ng/uL <dbl>, uL <dbl>, ng <dbl>, ng_pr_cell <dbl>,
## # total_genomes <dbl>, tcell_fraction <dbl>, NACT-group <fct>
SampleOverview %>% filter(group == 'long_chemo')
## # A tibble: 15 × 28
## filename subject_id ID COMET_ID COMET_ID_nr group group_code Kit_ID chemo
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 LivMet_1… LivMet_12 12 COMET-0… 27 long… 2 3.76e5 yes
## 2 LivMet_4… LivMet_43 43 COMET-0… 74 long… 2 3.76e5 yes
## 3 LivMet_4… LivMet_44 44 COMET-0… 75 long… 2 3.76e5 yes
## 4 LivMet_4… LivMet_45 45 COMET-0… 76 long… 2 3.76e5 yes
## 5 LivMet_6… LivMet_62 62 COMET-0… 105 long… 2 1.25e7 yes
## 6 LivMet_6… LivMet_68 68 COMET-0… 185 long… 2 1.25e7 yes
## 7 LivMet_8… LivMet_84 84 COMET-0… 80 long… 2 1.25e7 yes
## 8 LivMet_8… LivMet_85 85 COMET-0… 131 long… 2 1.25e7 yes
## 9 LivMet_8… LivMet_86 86 COMET-0… 138 long… 2 1.25e7 yes
## 10 LivMet_8… LivMet_87 87 COMET-0… 151 long… 2 1.25e7 yes
## 11 LivMet_8… LivMet_88 88 COMET-0… 182 long… 2 1.25e7 yes
## 12 LivMet_9… LivMet_90 90 COMET-0… 206 long… 2 1.25e7 yes
## 13 LivMet_9… LivMet_91 91 COMET-0… 233 long… 2 1.25e7 yes
## 14 LivMet_9… LivMet_92 92 COMET-0… 242 long… 2 1.25e7 yes
## 15 LivMet_9… LivMet_93 93 COMET-0… 267 long… 2 1.25e7 yes
## # … with 19 more variables: cluster <dbl>, total_templates <dbl>,
## # productive_templates <dbl>, fraction_productive <dbl>,
## # total_rearrangements <dbl>, productive_rearrangements <dbl>,
## # productive_simpson_clonality <dbl>, max_productive_frequency <dbl>,
## # locus <chr>, sample_tags <lgl>, sku <chr>, test_name <chr>, ng/uL <dbl>,
## # uL <dbl>, ng <dbl>, ng_pr_cell <dbl>, total_genomes <dbl>,
## # tcell_fraction <dbl>, NACT-group <fct>
SampleOverview %>% filter(group %in% c('short_chemo', 'long_chemo'))
## # A tibble: 50 × 28
## filename subject_id ID COMET_ID COMET_ID_nr group group_code Kit_ID chemo
## <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 LivMet_1… LivMet_12 12 COMET-0… 27 long… 2 376276 yes
## 2 LivMet_1… LivMet_13 13 COMET-0… 30 shor… 1 376276 yes
## 3 LivMet_1… LivMet_15 15 COMET-0… 31 shor… 1 376276 yes
## 4 LivMet_1… LivMet_19 19 COMET-0… 38 shor… 1 376276 yes
## 5 LivMet_2… LivMet_23 23 COMET-0… 43 shor… 1 376276 yes
## 6 LivMet_2… LivMet_24 24 COMET-0… 46 shor… 1 376276 yes
## 7 LivMet_3… LivMet_30 30 COMET-0… 55 shor… 1 376276 yes
## 8 LivMet_3… LivMet_32 32 COMET-0… 62 shor… 1 376276 yes
## 9 LivMet_3… LivMet_37 37 COMET-0… 5 shor… 1 376276 yes
## 10 LivMet_4… LivMet_41 41 COMET-0… 37 shor… 1 376276 yes
## # … with 40 more rows, and 19 more variables: cluster <dbl>,
## # total_templates <dbl>, productive_templates <dbl>,
## # fraction_productive <dbl>, total_rearrangements <dbl>,
## # productive_rearrangements <dbl>, productive_simpson_clonality <dbl>,
## # max_productive_frequency <dbl>, locus <chr>, sample_tags <lgl>, sku <chr>,
## # test_name <chr>, ng/uL <dbl>, uL <dbl>, ng <dbl>, ng_pr_cell <dbl>,
## # total_genomes <dbl>, tcell_fraction <dbl>, NACT-group <fct>
get_CI_half_width <- function(x, prob) {
n <- length(x)
z_t <- qt(1 - (1 - prob) / 2, df = n - 1)
z_t * sd(x) / sqrt(n)
}
lower <- function(x, prob = 0.95) {
mean(x) - get_CI_half_width(x, prob)
}
upper <- function(x, prob = 0.95) {
mean(x) + get_CI_half_width(x, prob)
}
Mean number of productive CDR3Beta templates per sample
templates <- SampleOverview$productive_templates
# mean templates
paste("Mean templates:", round(mean(templates), 0))
## [1] "Mean templates: 83098"
# lower CI
paste("Lower 95% CI: ", round(lower(templates), 0))
## [1] "Lower 95% CI: 66614"
# upper CI
paste("Upper 95% CI: ", round(upper(templates), 0))
## [1] "Upper 95% CI: 99582"
# median templates
paste("Median templates:", round(median(templates), 3))
## [1] "Median templates: 63063"
Mean T cell fraction per sample
fraction <- SampleOverview$tcell_fraction
# mean fraction
paste("Mean fraction:", round(mean(fraction), 3))
## [1] "Mean fraction: 0.081"
# lower CI
paste("Lower 95% CI: ", round(lower(fraction), 3))
## [1] "Lower 95% CI: 0.065"
# upper CI
paste("Upper 95% CI: ", round(upper(fraction), 3))
## [1] "Upper 95% CI: 0.096"
# median fraction
paste("Median fraction:", round(median(fraction), 3))
## [1] "Median fraction: 0.057"
Mean T cell fraction per sample short interval
fraction <- SampleOverview %>% filter(group == 'short_chemo') %>% pull(tcell_fraction)
# mean fraction
paste("Mean fraction:", round(mean(fraction), 3))
## [1] "Mean fraction: 0.106"
# lower CI
paste("Lower 95% CI: ", round(lower(fraction), 3))
## [1] "Lower 95% CI: 0.074"
# upper CI
paste("Upper 95% CI: ", round(upper(fraction), 3))
## [1] "Upper 95% CI: 0.138"
# median fraction
paste("Median fraction:", round(median(fraction), 3))
## [1] "Median fraction: 0.076"
Mean T cell fraction per sample no chemo
fraction <- SampleOverview %>% filter(group == 'no_chemo') %>% pull(tcell_fraction)
# mean fraction
paste("Mean fraction:", round(mean(fraction), 3))
## [1] "Mean fraction: 0.063"
# lower CI
paste("Lower 95% CI: ", round(lower(fraction), 3))
## [1] "Lower 95% CI: 0.048"
# upper CI
paste("Upper 95% CI: ", round(upper(fraction), 3))
## [1] "Upper 95% CI: 0.079"
# median fraction
paste("Median fraction:", round(median(fraction), 3))
## [1] "Median fraction: 0.057"
Mean T cell fraction per sample long chemo
fraction <- SampleOverview %>% filter(group == 'long_chemo') %>% pull(tcell_fraction)
# mean fraction
print(paste("Mean fraction:", round(mean(fraction), 3)))
## [1] "Mean fraction: 0.062"
# lower CI
print(paste("Lower 95% CI: ", round(lower(fraction), 3)))
## [1] "Lower 95% CI: 0.034"
# upper CI
print(paste("Upper 95% CI: ", round(upper(fraction), 3)))
## [1] "Upper 95% CI: 0.089"
# median fraction
paste("Median fraction:", round(median(fraction), 3))
## [1] "Median fraction: 0.048"
theme(axis.text.x = element_text(angle=60, hjust=1)
my_comparisons <- list(c('No-NACT', 'Short-interval'),
c('No-NACT', 'Long-interval'),
c('Short-interval', 'Long-interval'))
p <- ggviolin(SampleOverview, x='NACT-group', y='tcell_fraction',
color='NACT-group', palette='jco',
add='dotplot', scale='count')
p <- p + stat_compare_means(comparisons = my_comparisons, method='t.test',label.y=c(.45,.75, .6)) +
ylab("T cell fraction") + theme_minimal() +
theme(legend.position='none', axis.text.x = element_text(angle=60, hjust=1))
ggsave(filename = '/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/tcr_seq_t_fraction.pdf',
plot = p, width=4, height=5)
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
p
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
Total unique clones
comb_rear_all <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/CombinedRearrangements_ALL_COMET.tsv')
print(paste('Total unique clones:', comb_rear_all %>% pull(`Amino Acid`) %>% length()))
## [1] "Total unique clones: 1413435"
Median (min-max) unique clones
print(paste("Median unique clones:",SampleOverview$productive_rearrangements %>% median()))
## [1] "Median unique clones: 15596"
print(paste("Min unique clones:",SampleOverview$productive_rearrangements %>% min()))
## [1] "Min unique clones: 1476"
print(paste("Max unique clones:",SampleOverview$productive_rearrangements %>% max()))
## [1] "Max unique clones: 66976"
metadata <- bind_rows(read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data_376276/metadata_v2.txt') %>% dplyr::select(!group_code),
read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data_12511620/metadata_v2.txt') %>% dplyr::select(!group_code))
kit_1_qc <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/qc_reports/qcReport_kit_1.tsv')
kit_2_qc <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/qc_reports/qcReport_kit_2.tsv')
qc_report <- bind_rows(kit_1_qc, kit_2_qc)
## Warning: One or more parsing issues, see `problems()` for details
## Warning: One or more parsing issues, see `problems()` for details
coverage <- qc_report %>% select(`Sample Name`, Coverage)
colnames(coverage) <- c("sample_id", "Coverage")
coverage
## # A tibble: 95 × 2
## sample_id Coverage
## <chr> <dbl>
## 1 LivMet_1 26
## 2 LivMet_10 10.8
## 3 LivMet_11 28.7
## 4 LivMet_12 5.1
## 5 LivMet_13 3.8
## 6 LivMet_14 4.7
## 7 LivMet_15 15.8
## 8 LivMet_16 30.6
## 9 LivMet_17 19.6
## 10 LivMet_18 37.4
## # … with 85 more rows
# Set path to hill diversity profiles
path <- '/Users/eirikhoy/dropbox/projects/airr_tools/results/comet/hill_div/'
list_of_files <- list.files(path = path,
recursive = TRUE,
pattern = "\\.tsv$",
full.names = TRUE)
df <- readr::read_tsv(list_of_files)
df <- df %>% left_join(metadata[, c("Sample", "COMET_ID")], by=c("sample_id"="Sample")) %>%
left_join(coverage)
### new analysis, exclude coverage < 5 and patient duplicates
duplicate_samples <- c('LivMet_38', 'LivMet_11', 'LivMet_14', 'LivMet_17',
'LivMet_40', 'LivMet_42', 'LivMet_33')
df <- df %>% filter(!sample_id %in% duplicate_samples) %>%
filter(COMET_ID != 'COMET-0041M1') %>% # remove because NACT-interval data missing
filter(Coverage >= 5)
nr_samples <- df$sample_id %>% unique() %>% length()
print(paste("Number of samples after selecting for coverage >= 5:", nr_samples))
## [1] "Number of samples after selecting for coverage >= 5: 77"
annotation <- metadata %>%
filter(group != 'na') %>%
dplyr::select(COMET_ID, group) %>%
as.data.frame()
rownames(annotation) <- annotation$COMET_ID
annotation$COMET_ID <- NULL
diversity_profile <- df %>%
dplyr::select(COMET_ID, q, e) %>%
pivot_wider(names_from = COMET_ID, values_from = e) %>%
as.data.frame()
rownames(diversity_profile) <- diversity_profile$q
diversity_profile$q <- NULL
diversity_profile <- as.matrix(diversity_profile)
nr_categories <- length(unique(annotation$group))
ann_colors <- list(group = c(no_chemo='#0073C2FF', short_chemo="#EFC000FF", long_chemo="#3B3B3BFF"))
names(ann_colors$group) <- unique(unique(annotation$group))
quantile_breaks <- function(xs, n = 10) {
breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks[!duplicated(breaks)]
}
cl_cb <- function(hcl, mat){
# Recalculate manhattan distances for reorder method
dists <- dist(mat, method = "manhattan")
# Perform reordering according to OLO method
hclust_olo <- reorder(hcl, dists)
return(hclust_olo)
}
diversity_profile_breaks <- quantile_breaks(diversity_profile, n = 10)
pheatmap(diversity_profile,
annotation = annotation,
cluster_rows = TRUE,
annotation_colors = ann_colors,
border_color = NA,
main = paste('Evenness Profile\n', "n =", nr_samples),
fontsize = 7.5,
color=inferno(length(diversity_profile_breaks) - 1),
breaks = diversity_profile_breaks,
clustering_callback = cl_cb
)
diversity_profile_breaks <- quantile_breaks(cor(diversity_profile), n = 10)
pheatmap(cor(diversity_profile),
annotation_col = annotation,
annotation_row = annotation,
annotation_colors = ann_colors,
clustering_distance_cols = "manhattan",
clustering_distance_rows = "manhattan",
border_color = NA,
main = paste('Correlation Map Evenness Profile\n', 'n=', nr_samples),
fontsize = 7.5,
color = inferno(10),
#show_rownames = FALSE,
#show_colnames = FALSE,
#color = inferno(length(diversity_profile_breaks) - 1),
#breaks = diversity_profile_breaks
)
# reorder clusters according to OLO method
out <- pheatmap(cor(diversity_profile),
annotation_col = annotation,
annotation_row = annotation,
#cluster_rows = TRUE,
annotation_colors = ann_colors,
clustering_distance_cols = "manhattan",
clustering_distance_rows = "manhattan",
main = paste('Correlation Map Evenness Profile\n', 'n=', nr_samples),
fontsize = 7.5,
#color = inferno(length(diversity_profile_breaks) - 1),
#breaks = diversity_profile_breaks
clustering_callback = cl_cb,
# legend=FALSE,
# annotation_legend=FALSE,
#show_rownames = FALSE,
#show_colnames = FALSE,
border_color = NA,
color = inferno(10)
#cutree_rows = 6,
#cutree_cols = 6
)
height = 2.5
plot(out$tree_col)
abline(h=height, col='red', lty=2, lwd=2)
#Cut the row (gene) dendrogram at a Euclidean distance dis-similarity of 8
d = data.frame(sort(cutree(out$tree_col, h=height)))
d = tibble(
'COMET_ID' = rownames(d),
'cluster' = as.character(d[,1])
)
t <- d %>% merge(metadata, by.x='COMET_ID', by.y='COMET_ID') %>% dplyr::select(group, cluster)# %>% filter(cluster != 6)
write_tsv(d %>% left_join(metadata %>% dplyr::select(c(Sample, COMET_ID)), by='COMET_ID'), file='/Users/eirikhoy/dropbox/Manuscript_ImmunoSEQ_COMET/supplementary_files/results/hill_even_clust_dist_3.tsv')
#t$group[t$group == 'long_chemo'] <- 'yes'
#t$group[t$group == 'short_chemo'] <- 'yes'
#t$group[t$group == 'no_chemo'] <- 'no'
t <- table(t)
print(t)
## cluster
## group 1 2 3 4 5 6
## long_chemo 6 6 0 1 2 0
## no_chemo 10 7 6 8 0 1
## short_chemo 11 11 1 1 5 1
#chisq <- chisq.test(t, simulate.p.value = FALSE, B=10000)
#print(chisq)
fish <- fisher.test(t)
print(fish)
##
## Fisher's Exact Test for Count Data
##
## data: t
## p-value = 0.02132
## alternative hypothesis: two.sided
get_CI_half_width <- function(x, prob) {
n <- length(x)
z_t <- qt(1 - (1 - prob) / 2, df = n - 1)
z_t * sd(x) / sqrt(n)
}
lower <- function(x, prob = 0.95) {
mean(x) - get_CI_half_width(x, prob)
}
upper <- function(x, prob = 0.95) {
mean(x) + get_CI_half_width(x, prob)
}
no_chemo_ <- df %>%
merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>%
as_tibble() %>% filter(group == 'no_chemo') %>%
group_by(q) %>% summarize_at(vars(c('d','e')), funs(mean, sd, min, max, lower, upper)) %>%
mutate(group='no_chemo')
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
short_chemo_ <- df %>%
merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>%
as_tibble() %>% filter(group == 'short_chemo') %>%
group_by(q) %>% summarize_at(vars(c('d','e')), funs(mean, sd, min, max, lower, upper)) %>%
mutate(group='short_chemo')
long_chemo_ <- df %>%
merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>%
as_tibble() %>% filter(group == 'long_chemo') %>%
group_by(q) %>% summarize_at(vars(c('d','e')), funs(mean, sd, min, max, lower, upper)) %>%
mutate(group='long_chemo')
df_hill <- bind_rows(bind_rows(no_chemo_, short_chemo_), long_chemo_) %>%
mutate(group = factor(group, levels=c("no_chemo", "short_chemo", "long_chemo")))
p1 <- df_hill %>%
mutate(`NACT-group` = case_when(
group == 'no_chemo' ~ 'No-NACT',
group == 'short_chemo' ~ 'Short-interval',
group == 'long_chemo' ~ 'Long-interval'
)) %>%
mutate(`NACT-group` = factor(`NACT-group`, levels = c('No-NACT', 'Short-interval', 'Long-interval'))) %>%
ggplot(aes(x=q, y=d_mean, group=`NACT-group`, color=`NACT-group`)) +
geom_line() +
geom_ribbon(aes(ymin = d_lower, ymax = d_upper, fill=`NACT-group`), alpha = 0.1, color=NA) +
theme_classic() +
xlab(expression(alpha)) + ylab('Hill diversity') +
theme(axis.title.x = element_text(face='italic')) +
scale_x_continuous(breaks=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) + theme(legend.title = element_blank())
p1 <- set_palette(p1, 'jco')
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/hill_div_prof.pdf',
plot=p1,
width=6, height=4)
p1
Species richness all NACT groups
species_richness_all <- df %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% filter(q == 0.0) %>% pull(d)
print(paste("N =", length(species_richness_all)))
## [1] "N = 77"
print(paste("Mean species richness:", round(mean(species_richness_all),0)))
## [1] "Mean species richness: 16000"
print(paste("Lower CI species richness:", round(lower(species_richness_all),0)))
## [1] "Lower CI species richness: 13611"
print(paste("Upper CI species richness:", round(upper(species_richness_all),0)))
## [1] "Upper CI species richness: 18388"
Species richness short-interval NACT group
species_richness_short_chemo <- df %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% filter(group == 'short_chemo') %>% filter(q == 0.0) %>% pull(d)
print(paste("N =", length(species_richness_short_chemo)))
## [1] "N = 30"
print(paste("Mean species richness:", round(mean(species_richness_short_chemo),0)))
## [1] "Mean species richness: 18020"
print(paste("Lower CI species richness:", round(lower(species_richness_short_chemo),0)))
## [1] "Lower CI species richness: 13524"
print(paste("Upper CI species richness:", round(upper(species_richness_short_chemo),0)))
## [1] "Upper CI species richness: 22515"
Species richness long-interval NACT group
species_richness_long_chemo <- df %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% filter(group == 'long_chemo') %>% filter(q == 0.0) %>% pull(d)
print(paste("N =", length(species_richness_long_chemo)))
## [1] "N = 15"
print(paste("Mean species richness:", round(mean(species_richness_long_chemo),0)))
## [1] "Mean species richness: 16079"
print(paste("Lower CI species richness:", round(lower(species_richness_long_chemo),0)))
## [1] "Lower CI species richness: 10394"
print(paste("Upper CI species richness:", round(upper(species_richness_long_chemo),0)))
## [1] "Upper CI species richness: 21764"
Species richness No-NACT group
species_richness_no_chemo <- df %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% filter(group == 'no_chemo') %>% filter(q == 0.0) %>% pull(d)
print(paste("N =", length(species_richness_no_chemo)))
## [1] "N = 32"
print(paste("Mean species richness:", round(mean(species_richness_no_chemo),0)))
## [1] "Mean species richness: 14069"
print(paste("Lower CI species richness:", round(lower(species_richness_no_chemo),0)))
## [1] "Lower CI species richness: 10833"
print(paste("Upper CI species richness:", round(upper(species_richness_no_chemo),0)))
## [1] "Upper CI species richness: 17304"
p2 <- df_hill %>%
ggplot(aes(x=q, y=e_mean, group=group, color=group)) +
geom_line() +
geom_ribbon(aes(ymin = e_lower, ymax = e_upper, fill=group), alpha = 0.1, color=NA) +
theme_classic() +
xlab(expression(alpha)) + ylab('Hill evenness') +
theme(axis.title.x = element_text(face='italic')) +
scale_x_continuous(breaks=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
p2 <- set_palette(p2, 'jco')
p2
leg <- get_legend(p1)
p3 <- ggarrange(p1, p2, ncol=2, nrow=1,
legend.grob = leg,
legend='right')
ggsave(filename = '/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/hill_div_even.pdf',
plot = p3, width = 9, height=3)
samples <- unique(df$COMET_ID)
hill_auc <- matrix(ncol=1, nrow=0)
#rownames(even_auc) <- samples
for (i in samples){
x <- df %>% filter(COMET_ID == i) %>% pull(q)
y <- df %>% filter(COMET_ID == i) %>% pull(e)
auc <- sintegral(x, y)$int
hill_auc[i] <- auc
}
hill_auc <- data.frame(hill_auc)
hill_auc['COMET_ID'] <- rownames(hill_auc)
hill_auc <- as_tibble(hill_auc)
hill_auc <- hill_auc %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% as_tibble()
hill_auc <- hill_auc %>% filter(group != 'na')
hill_auc <- hill_auc %>% filter(!Sample %in% duplicate_samples)
hill_auc$group <- factor(hill_auc$group, levels=c('no_chemo', 'short_chemo', 'long_chemo'))
hill_auc <- left_join(x=hill_auc, y=qc_report, by=c("Sample" = "Sample Name"))
Species richness dependence on coverage
# lm species richness versus coverage
lm.fit <- df %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% filter(q == 0.0) %>%
lm(.$d ~ .$Coverage, data=.)
summary(lm.fit)
##
## Call:
## lm(formula = .$d ~ .$Coverage, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11804 -5343 -1420 4514 27243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27636.35 1745.94 15.829 < 2e-16 ***
## .$Coverage -634.62 81.64 -7.773 3.22e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7884 on 75 degrees of freedom
## Multiple R-squared: 0.4462, Adjusted R-squared: 0.4388
## F-statistic: 60.42 on 1 and 75 DF, p-value: 3.22e-11
confint(lm.fit, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 24158.2641 31114.4323
## .$Coverage -797.2652 -471.9846
# plot species richness versus coverage
df %>% merge(metadata, by.x = 'COMET_ID', by.y = 'COMET_ID') %>% filter(q == 0.0) %>%
ggplot(aes(x=Coverage, y=d)) +
geom_point() +
geom_smooth(method='lm', color='red') +
theme_minimal() +
ylab("Species Richness") + xlab("Coverage")
## `geom_smooth()` using formula 'y ~ x'
Species richness is significantly associated with coverage
AUC evenness profile dependence on coverage
# lm hill evenness auc versus coverage
lm.fit <- lm(hill_auc ~ Coverage, data=hill_auc)
summary(lm.fit)
##
## Call:
## lm(formula = hill_auc ~ Coverage, data = hill_auc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3526 -0.7069 -0.1350 0.5973 2.5010
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.25846 0.24952 17.067 <2e-16 ***
## Coverage 0.01477 0.01167 1.266 0.209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.127 on 75 degrees of freedom
## Multiple R-squared: 0.02092, Adjusted R-squared: 0.007868
## F-statistic: 1.603 on 1 and 75 DF, p-value: 0.2094
confint(lm.fit, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 3.761397043 4.75551660
## Coverage -0.008472166 0.03801431
# plot hill evenness auc versus coverage
p <- hill_auc %>%
ggplot(aes(x=Coverage, y=hill_auc)) +
geom_point() + geom_vline(xintercept = 5, linetype = 'dotted', color='red', size=1.5) +
geom_smooth(method='lm', color='red')
hill_auc <- hill_auc %>% filter(Coverage >= 5)
hill_auc <- hill_auc %>% mutate("NACT-group" = factor(case_when(group == 'no_chemo' ~ 'No-NACT',
group == 'short_chemo' ~ 'Short-interval',
group == 'long_chemo' ~ 'Long-interval'),
levels=c('No-NACT', 'Short-interval', 'Long-interval')))
hill_auc['AUC_group'] <- cut(hill_auc$hill_auc, 3, labels=FALSE)
hill_auc <- hill_auc %>% mutate(Clonality = 10 - hill_auc)
p
## `geom_smooth()` using formula 'y ~ x'
There was no significant association between area under the curve of
hill evenness profiles and coverage
library(ggpubr)
my_comparisons <- list(c('No-NACT', 'Short-interval'),
c('No-NACT', 'Long-interval'),
c('Short-interval', 'Long-interval'))
p <- ggboxplot(hill_auc, x='NACT-group', y='hill_auc',
color='NACT-group', palette='jco',
add='jitter')
p + stat_compare_means(comparisons = my_comparisons, method='t.test') +
ylab("AUC evenness profile") + xlab("") + theme_minimal() +
theme(axis.text.x = element_text(angle=60, hjust=1),
legend.position='none')
library(ggpubr)
my_comparisons <- list(c('No-NACT', 'Short-interval'),
c('No-NACT', 'Long-interval'),
c('Short-interval', 'Long-interval'))
p <- ggboxplot(hill_auc, x='NACT-group', y='Clonality',
color='NACT-group', palette='jco',
add='jitter')
p <- p + stat_compare_means(comparisons = my_comparisons, method='t.test',
aes(label = paste0("p = ", ..p.format..))) +
ylab("Clonality") + xlab("") + theme_minimal() +
theme(axis.text.x = element_text(angle=60, hjust=1),
legend.position='none')
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/Clonality_AUC.pdf',
plot=p,
width=3, height=6)
p
compare_means(Clonality ~ group, data=hill_auc,
method='t.test',
p.adjust.method = 'fdr')
## # A tibble: 3 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Clonality no_chemo short_chemo 0.00411 0.012 0.0041 ** T-test
## 2 Clonality no_chemo long_chemo 0.0280 0.042 0.0280 * T-test
## 3 Clonality short_chemo long_chemo 0.709 0.71 0.7092 ns T-test
hill_auc
## # A tibble: 77 × 18
## COMET_ID hill_auc Sample ID COMET_ID_nr group Kit_ID `Kit UID` `Run ID`
## <chr> <dbl> <chr> <dbl> <dbl> <fct> <dbl> <dbl> <chr>
## 1 COMET-000… 5.72 LivMe… 35 3 no_ch… 376276 376276 376276_1
## 2 COMET-000… 6.38 LivMe… 36 4 no_ch… 376276 376276 376276_1
## 3 COMET-000… 3.70 LivMe… 37 5 short… 376276 376276 376276_1
## 4 COMET-001… 3.55 LivMe… 6 10 no_ch… 376276 376276 376276_1
## 5 COMET-001… 5.57 LivMe… 5 11 no_ch… 376276 376276 376276_1
## 6 COMET-001… 4.15 LivMe… 7 15 no_ch… 376276 376276 376276_1
## 7 COMET-001… 4.17 LivMe… 8 17 no_ch… 376276 376276 376276_1
## 8 COMET-001… 3.93 LivMe… 9 18 short… 376276 376276 376276_1
## 9 COMET-002… 5.27 LivMe… 39 23 no_ch… 376276 376276 376276_1
## 10 COMET-002… 3.98 LivMe… 10 24 no_ch… 376276 376276 376276_1
## # … with 67 more rows, and 9 more variables: Well Location <chr>,
## # Date Posted <chr>, Gene Rearrangements <dbl>, Coverage <dbl>,
## # % On-target <dbl>, QC Status <chr>, NACT-group <fct>, AUC_group <int>,
## # Clonality <dbl>
library(ggpubr)
sidedness <- readxl::read_xlsx('/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/tables/metadata.xlsx') %>% select(Sample, pCRC_Location)
hill_auc_with_sidedness <- hill_auc %>% left_join(sidedness) %>%
filter(!pCRC_Location %in% c("na")) %>%
mutate(pCRC_Location = case_when(
pCRC_Location == "right_colon" ~ "Right Side",
pCRC_Location == "left_colon" ~ "Left Side",
pCRC_Location == "rectum" ~ "Rectum",
pCRC_Location == "transverse_colon" ~ "Left Side"
)) %>%
mutate(pCRC_Location = factor(pCRC_Location, levels = c("Right Side", "Left Side", "Rectum")))
## Joining, by = "Sample"
my_comparisons <- list(c('Left Side', 'Right Side'),
c('Left Side', 'Rectum'),
c('Right Side', 'Rectum'))
p <- ggboxplot(hill_auc_with_sidedness, x='pCRC_Location', y='Clonality',
color='pCRC_Location', #palette='jco',
add='jitter')
p <- p + stat_compare_means(comparisons = my_comparisons, method='t.test',
aes(label = paste0("p = ", ..p.format..))) +
ylab("Clonality") + xlab("") + theme_minimal() +
theme(axis.text.x = element_text(angle=60, hjust=1),
legend.position='none')
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/Clonality_AUC_pcrc_sidedness.pdf',
plot=p,
width=3, height=6)
p
sidedness$pCRC_Location
## [1] "right_colon" "rectum" "left_colon" "right_colon"
## [5] "left_colon" "left_colon" "rectum" "rectum"
## [9] "rectum" "rectum" "right_colon" "rectum"
## [13] "rectum" "left_colon" "left_colon" "left_colon"
## [17] "left_colon" "left_colon" "left_colon" "rectum"
## [21] "right_colon" "right_colon" "rectum" "right_colon"
## [25] "left_colon" "transverse_colon" "left_colon" "left_colon"
## [29] "left_colon" "right_colon" "rectum" "rectum"
## [33] "right_colon" "rectum" "left_colon" "left_colon"
## [37] "rectum" "rectum" "rectum" "right_colon"
## [41] "left_colon" "right_colon" "right_colon" "left_colon"
## [45] "right_colon" "left_colon" "rectum" "rectum"
## [49] "rectum" "left_colon" "rectum" "right_colon"
## [53] "rectum" "rectum" "rectum" "rectum"
## [57] "right_colon" "left_colon" "left_colon" "rectum"
## [61] "rectum" "rectum" "rectum" "rectum"
## [65] "rectum" "rectum" "rectum" "rectum"
## [69] "left_colon" "left_colon" "left_colon" "rectum"
## [73] "rectum" "left_colon" "left_colon" "na"
## [77] "right_colon" "na" "left_colon" "right_colon"
## [81] "left_colon" "rectum" "left_colon" "left_colon"
## [85] "rectum" "right_colon" "left_colon" "rectum"
## [89] "left_colon" "left_colon" "left_colon" "na"
clonality_all <- hill_auc %>% pull(Clonality)
print(paste("N =", length(clonality_all)))
## [1] "N = 77"
print(paste("Mean Clonality:", round(mean(clonality_all),1)))
## [1] "Mean Clonality: 5.5"
print(paste("Lower CI Clonality:", round(lower(clonality_all),1)))
## [1] "Lower CI Clonality: 5.2"
print(paste("Upper CI Clonality:", round(upper(clonality_all),1)))
## [1] "Upper CI Clonality: 5.7"
clonality_short_chemo <- hill_auc %>% filter(group == 'short_chemo') %>% pull(Clonality)
print(paste("N =", length(clonality_short_chemo)))
## [1] "N = 30"
print(paste("Mean Clonality:", round(mean(clonality_short_chemo),1)))
## [1] "Mean Clonality: 5.8"
print(paste("Lower CI Clonality:", round(lower(clonality_short_chemo),1)))
## [1] "Lower CI Clonality: 5.5"
print(paste("Upper CI Clonality:", round(upper(clonality_short_chemo),1)))
## [1] "Upper CI Clonality: 6.2"
clonality_long_chemo <- hill_auc %>% filter(group == 'long_chemo') %>% pull(Clonality)
print(paste("N =", length(clonality_long_chemo)))
## [1] "N = 15"
print(paste("Mean Clonality:", round(mean(clonality_long_chemo),1)))
## [1] "Mean Clonality: 5.7"
print(paste("Lower CI Clonality:", round(lower(clonality_long_chemo),1)))
## [1] "Lower CI Clonality: 5.2"
print(paste("Upper CI Clonality:", round(upper(clonality_long_chemo),1)))
## [1] "Upper CI Clonality: 6.2"
clonality_no_chemo <- hill_auc %>% filter(group == 'no_chemo') %>% pull(Clonality)
print(paste("N =", length(clonality_no_chemo)))
## [1] "N = 32"
print(paste("Mean Clonality:", round(mean(clonality_no_chemo),1)))
## [1] "Mean Clonality: 5"
print(paste("Lower CI Clonality:", round(lower(clonality_no_chemo),1)))
## [1] "Lower CI Clonality: 4.6"
print(paste("Upper CI Clonality:", round(upper(clonality_no_chemo),1)))
## [1] "Upper CI Clonality: 5.4"
lm.fit <- left_join(hill_auc, Rearrangement_all %>% group_by(COMET_ID) %>%
mutate(nr_tcells = sum(seq_count)) %>%
dplyr::select(COMET_ID, nr_tcells) %>% distinct()) %>%
lm(Clonality ~ nr_tcells, data=.)
## Joining, by = "COMET_ID"
summary(lm.fit)
##
## Call:
## lm(formula = Clonality ~ nr_tcells, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.59439 -0.67233 -0.05764 0.79094 1.94934
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.604e+00 2.574e-01 17.891 < 2e-16 ***
## nr_tcells 8.433e-06 2.189e-06 3.852 0.000494 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.029 on 34 degrees of freedom
## (41 observations deleted due to missingness)
## Multiple R-squared: 0.3038, Adjusted R-squared: 0.2834
## F-statistic: 14.84 on 1 and 34 DF, p-value: 0.0004938
confint(lm.fit, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 4.081417e+00 5.127430e+00
## nr_tcells 3.983955e-06 1.288189e-05
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
p <- left_join(hill_auc, Rearrangement_all %>% group_by(COMET_ID) %>%
mutate(nr_tcells = sum(seq_count)) %>%
dplyr::select(COMET_ID, nr_tcells) %>% distinct()) %>%
ggplot(aes(x=nr_tcells, y=Clonality)) +
geom_point() +
geom_smooth(method='lm', color='red') +
theme_minimal() +
ylab("Clonality") + xlab("Number of T cells") +
scale_x_continuous(labels = comma)
## Joining, by = "COMET_ID"
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/lm_clonality_nr_t_cells.pdf',
plot=p,
width=5, height=5)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 41 rows containing non-finite values (stat_smooth).
## Warning: Removed 41 rows containing missing values (geom_point).
p
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 41 rows containing non-finite values (stat_smooth).
## Warning: Removed 41 rows containing missing values (geom_point).
meta1 <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data_376276/metadata.txt') %>% dplyr::select(-group_code)
## Rows: 46 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Sample, COMET_ID, group, group_code
## dbl (3): ID, COMET_ID_nr, Kit_ID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meta2 <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data_12511620/metadata.txt') %>% dplyr::select(-group_code)
## Rows: 47 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Sample, COMET_ID, group
## dbl (4): ID, COMET_ID_nr, group_code, Kit_ID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- bind_rows(meta1, meta2)
no_chemo_names <- metadata %>% filter(group == 'no_chemo') %>% pull(Sample)
short_chemo_names <- metadata %>% filter(group == 'short_chemo') %>% pull(Sample)
long_chemo_names <- metadata %>% filter(group == 'long_chemo') %>% pull(Sample)
comb_rear_all <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/CombinedRearrangements_ALL_COMET.tsv')
## Rows: 1413435 Columns: 98
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Amino Acid
## dbl (97): Sum (Templates), Present In, LivMet_1, LivMet_10, LivMet_11, LivMe...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
columns <- colnames(comb_rear_all[, 4:ncol(comb_rear_all)])
columns <- columns[1:(length(columns)-2)]
df_ <- data.frame(
'Sample' = columns
)
columns <- df_ %>% left_join(metadata) %>% pull(COMET_ID)
## Joining, by = "Sample"
colnames(comb_rear_all) <- c("Amino Acid", "Sum (Templates)", "Present In", columns, "Negative", "Positive")
comb_rear_all
## # A tibble: 1,413,435 × 98
## `Amino Acid` `Sum (Templates)` `Present In` `COMET-0026M1` `COMET-0024M1`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CASSLRTGATLHAFF 34204 2 0 0
## 2 CASSQEPDIKPNQPQHF 28532 1 0 0
## 3 CASSIMAAGGDGYTF 14526 1 0 0
## 4 CASSEFRVGYEQYF 12134 1 0 0
## 5 CASSNRGGNEQFF 11557 3 0 0
## 6 CASSQNRDGEQFF 11278 2 0 0
## 7 CASSPPDRNTEAFF 10984 2 0 0
## 8 CATKDSKNTEAFF 9903 2 0 0
## 9 CASRGSMNTEAFF 8848 8 0 0
## 10 CASSETGQGHGYTF 8508 1 0 0
## # … with 1,413,425 more rows, and 93 more variables: COMET-0026M2 <dbl>,
## # COMET-0027M3 <dbl>, COMET-0030M1 <dbl>, COMET-0030M2 <dbl>,
## # COMET-0031M1 <dbl>, COMET-0032M1 <dbl>, COMET-0032M2 <dbl>,
## # COMET-0034M1 <dbl>, COMET-0038M1 <dbl>, COMET-0035M1_1 <dbl>,
## # COMET-0040M1 <dbl>, COMET-0041M1 <dbl>, COMET-0042M1 <dbl>,
## # COMET-0043M1 <dbl>, COMET-0046M1 <dbl>, COMET-0049M1 <dbl>,
## # COMET-0051M1 <dbl>, COMET-0052M1 <dbl>, COMET-0053M1 <dbl>, …
public_private <- comb_rear_all %>%
mutate(public_private = case_when(`Present In` == 1 ~ "Private",
`Present In` > 1 ~ "Public")) %>%
dplyr::select(`Amino Acid`, `Sum (Templates)`, `Present In`, public_private)
public_private
## # A tibble: 1,413,435 × 4
## `Amino Acid` `Sum (Templates)` `Present In` public_private
## <chr> <dbl> <dbl> <chr>
## 1 CASSLRTGATLHAFF 34204 2 Public
## 2 CASSQEPDIKPNQPQHF 28532 1 Private
## 3 CASSIMAAGGDGYTF 14526 1 Private
## 4 CASSEFRVGYEQYF 12134 1 Private
## 5 CASSNRGGNEQFF 11557 3 Public
## 6 CASSQNRDGEQFF 11278 2 Public
## 7 CASSPPDRNTEAFF 10984 2 Public
## 8 CATKDSKNTEAFF 9903 2 Public
## 9 CASRGSMNTEAFF 8848 8 Public
## 10 CASSETGQGHGYTF 8508 1 Private
## # … with 1,413,425 more rows
Fraction public versus private clones
nr_private <- public_private %>% filter(public_private == 'Private') %>% pull(public_private) %>% length()
nr_public <- public_private %>% filter(public_private == 'Public') %>% pull(public_private) %>% length()
total_clones <- nr_private + nr_public
fraction_private <- nr_private / total_clones
fraction_public <- nr_public / total_clones
print(paste("Public Clones:", nr_public))
## [1] "Public Clones: 137245"
print(paste("Fraction Public:", fraction_public))
## [1] "Fraction Public: 0.0971003265095317"
print(paste("Private Clones:", nr_private))
## [1] "Private Clones: 1276190"
print(paste("Fraction Private:", fraction_private))
## [1] "Fraction Private: 0.902899673490468"
df_public_private <- data.frame(
"type" = c("Private", "Public"),
"Percentage" = c(fraction_private*100, fraction_public*100)
#"Private" = fraction_private,
#"Public" = fraction_public
)
df_public_private$type <- factor(df_public_private$type, levels=c("Public", "Private"))
p <- df_public_private %>%
ggplot(aes(x=type, y=Percentage, fill=type)) +
scale_fill_jama() +#values=c("#E7B800", "#FC4E07")) +
geom_col(color='black') +
theme_classic() + xlab("") +
scale_y_continuous(name="% of Total Clonltypes", limits=c(0, 100), breaks=seq(0, 100, 25)) +
theme(legend.position="none")
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/public_vs_private_percentage.pdf',
plot=p,
width=1.5, height=4)
p
x <- as.data.frame(comb_rear_all %>% dplyr::select(!c("Positive", "Negative")))
rownames(x) <- x$`Amino Acid`
x <- x[, 4:(ncol(x))]
result <- mh(x, PlugIn = TRUE, graph=TRUE)
### Comparison of similarity of samples from the same patient.
same_patient <- c("LivMet_16", "LivMet_17", "LivMet_32", "LivMet_33", "LivMet_3", "LivMet_42",
"LivMet_2", "LivMet_40", "LivMet_38", "LivMet_5", "LivMet_13", "LivMet_14")
same_patient <- c("COMET-0026M1", "COMET-0026M2",
"COMET-0032M1", "COMET-0032M2",
"COMET-0062M1", "COMET-0062M2",
"COMET-0059M1_1", "COMET-0059M1_2",
"COMET-0035M1_1", "COMET-0035M1_2",
"COMET-0011M1_1", "COMET-0011M1_2",
"COMET-0030M1","COMET-0030M2")
mh_same_patient <- result$PlugIn[same_patient, same_patient]
pheatmap(mh_same_patient, cluster_rows = FALSE, cluster_cols = FALSE)
same_patient_cols <- c("COMET-0026M1", "COMET-0032M1", "COMET-0062M1", "COMET-0059M1_1", "COMET-0035M1_1", "COMET-0011M1_1", "COMET-0030M1")
same_patient_rows <- c("COMET-0026M2", "COMET-0032M2", "COMET-0062M2", "COMET-0059M1_2", "COMET-0035M1_2", "COMET-0011M1_2", "COMET-0030M2")
same_patient_mh <- c(
mh_same_patient["COMET-0026M2", "COMET-0026M1"],
mh_same_patient["COMET-0032M2", "COMET-0032M1"],
mh_same_patient["COMET-0062M2", "COMET-0062M1"],
mh_same_patient["COMET-0030M2", "COMET-0030M1"],
mh_same_patient["COMET-0059M1_2","COMET-0059M1_1"],
mh_same_patient["COMET-0035M1_2","COMET-0035M1_1"],
mh_same_patient["COMET-0011M1_1","COMET-0011M1_2"])
mh_same_vs_diff_df <- data.frame(
'mh_val' = same_patient_mh
)
mh_same_vs_diff_df['type'] <- c('Same Patient Different Metastasis',
'Same Patient Different Metastasis',
'Same Patient Different Metastasis',
'Same Patient Different Metastasis',
'Same Patient Same Metastasis',
'Same Patient Same Metastasis',
'Same Patient Same Metastasis'
)
non_same <- metadata %>% filter(!COMET_ID %in% same_patient) %>% pull(COMET_ID)
mh_non_same <- result$PlugIn[non_same, non_same]
df_ <- data.frame(
'mh_val' = mh_non_same[lower.tri(mh_non_same)] )
df_['type'] <- 'Different Patient'
df_ <- as_tibble(df_)
mh_same_vs_diff_df <- as_tibble(mh_same_vs_diff_df)
mh_same_vs_diff_df <- bind_rows(mh_same_vs_diff_df, df_)
mh_same_vs_diff_df$type <- factor(mh_same_vs_diff_df$type, levels=c("Different Patient",
"Same Patient Different Metastasis",
"Same Patient Same Metastasis"))
my_comparisons <- list( c('Same Patient Same Metastasis', 'Different Patient'),
c('Same Patient Different Metastasis', 'Same Patient Same Metastasis'),
c('Same Patient Different Metastasis', 'Different Patient')
)
p <- ggboxplot(mh_same_vs_diff_df, x='type', y='mh_val',
fill='type', palette=c("#00AFBB", "#E7B800", "#FC4E07"),#'jam',
add='jitter') + theme(legend.position='none')# +
# ylim(c(0, 1.2)))
p <- p + stat_compare_means(comparisons = my_comparisons, method='t.test', label.y=c(1,1.15, .85)) +
scale_y_continuous(name="MH-Index", limits=c(0, 1.2), breaks=seq(0, 1, .25)) + xlab("") +
theme(axis.text.x = element_text(size=9,
vjust = grid::unit(c(-2, 0, -2), "points")))
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/morisita_horn.pdf',
plot=p,
width=5, height=4)
## Warning: Removed 120 rows containing missing values (geom_point).
p
## Warning: Removed 116 rows containing missing values (geom_point).
print(paste("Mean Same Patient Same Metastasis: ", round(mh_same_vs_diff_df %>% filter(type=='Same Patient Same Metastasis') %>% pull(mh_val) %>% mean(),2)))
## [1] "Mean Same Patient Same Metastasis: 0.93"
print(paste("Min Same Patient Same Metastasis: ", round(mh_same_vs_diff_df %>% filter(type=='Same Patient Same Metastasis') %>% pull(mh_val) %>% min(),2)))
## [1] "Min Same Patient Same Metastasis: 0.86"
print(paste("Max Same Patient Same Metastasis: ", round(mh_same_vs_diff_df %>% filter(type=='Same Patient Same Metastasis') %>% pull(mh_val) %>% max(),2)))
## [1] "Max Same Patient Same Metastasis: 0.97"
print(paste("Mean Same Patient Different Metastasis: ", round(mh_same_vs_diff_df %>% filter(type=='Same Patient Different Metastasis') %>% pull(mh_val) %>% mean(),2)))
## [1] "Mean Same Patient Different Metastasis: 0.45"
print(paste("Mean Min Patient Different Metastasis: ", round(mh_same_vs_diff_df %>% filter(type=='Same Patient Different Metastasis') %>% pull(mh_val) %>% min(),2)))
## [1] "Mean Min Patient Different Metastasis: 0.09"
print(paste("Mean Max Patient Different Metastasis: ", round(mh_same_vs_diff_df %>% filter(type=='Same Patient Different Metastasis') %>% pull(mh_val) %>% max(),2)))
## [1] "Mean Max Patient Different Metastasis: 0.97"
print(paste("Mean Different Patient: ", round(mh_same_vs_diff_df %>% filter(type=='Different Patient') %>% pull(mh_val) %>% mean(),10)))
## [1] "Mean Different Patient: 0.000700742"
print(paste("Min Different Patient: ", round(mh_same_vs_diff_df %>% filter(type=='Different Patient') %>% pull(mh_val) %>% min(),5)))
## [1] "Min Different Patient: 0"
print(paste("Max Different Patient: ", round(mh_same_vs_diff_df %>% filter(type=='Different Patient') %>% pull(mh_val) %>% max(),5)))
## [1] "Max Different Patient: 0.06873"
graph_param_random <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/graph_params_random_public.tsv')
df <- as.data.frame(graph_param_random %>% dplyr::select(-one_of('COMET_ID', 'COMET_ID_nr', 'group', 'Kit_ID',
'ld', 'group_code', 'ID')) %>% drop_na())
rownames(df) <- df$sample
df$sample <- NULL
df <- mutate_all(df, function(x) as.numeric(as.character(x)))
df['Sample'] <- rownames(df)
df <- left_join(df, metadata %>% dplyr::select(Sample, group))
df <- df %>% filter(group != 'na')
graph_param_random <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/graph_params_random_private.tsv')
df2 <- as.data.frame(graph_param_random %>% dplyr::select(-one_of('COMET_ID', 'COMET_ID_nr', 'group', 'Kit_ID',
'ld', 'group_code', 'ID')))# %>% drop_na())
rownames(df2) <- df2$sample
df2$sample <- NULL
df2 <- mutate_all(df2, function(x) as.numeric(as.character(x)))
df2['Sample'] <- rownames(df2)
df2 <- left_join(df2, metadata %>% dplyr::select(Sample, group))
df2 <- df2 %>% filter(group != 'na')
df <- df[, 1:19] %>% pivot_longer(!Sample, names_to='parameter', values_to = "value")
df['private_public'] <- 'Public'
df2 <- df2[, 1:19] %>% pivot_longer(!Sample, names_to='parameter', values_to = "value")
df2['private_public'] <- 'Private'
df <- bind_rows(df, df2)
df <- df %>% left_join(coverage, by=c("Sample"="sample_id"))
df <- df %>% filter(Coverage >= 5)
df
## # A tibble: 2,772 × 5
## Sample parameter value private_public Coverage
## <chr> <chr> <dbl> <chr> <dbl>
## 1 LivMet_1 transitivity 0.233 Public 26
## 2 LivMet_1 authority 0.00795 Public 26
## 3 LivMet_1 page_rank 0.00100 Public 26
## 4 LivMet_1 eigen_centr 0.00795 Public 26
## 5 LivMet_1 n_nodes 1000 Public 26
## 6 LivMet_1 n_edges 130 Public 26
## 7 LivMet_1 max_degree 5 Public 26
## 8 LivMet_1 largest_comp 19 Public 26
## 9 LivMet_1 max_k_core 2 Public 26
## 10 LivMet_1 diameter 8 Public 26
## # … with 2,762 more rows
comparison <- df %>% filter(parameter == 'n_edges')
my_comparisons <- list(c('Private', 'Public'))
p <- ggboxplot(comparison, x='private_public', y='value',
color='private_public', palette='jama',
add='jitter') + theme(legend.position='none')
p <- p + stat_compare_means(comparisons = my_comparisons, method='t.test') +
ylab("Number of Connections (Edges)") + xlab("")
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/connectivity_public_vs_private.pdf',
plot=p,
width=4, height=5)
p
print(paste("Nr Public:", df %>% filter(parameter=='n_edges') %>% filter(private_public == "Public") %>% pull(Sample) %>% length()))
## [1] "Nr Public: 77"
print(paste("Nr Private:", df %>% filter(parameter=='n_edges') %>% filter(private_public == "Public") %>% pull(Sample) %>% length()))
## [1] "Nr Private: 77"
top_10perc_mcpas <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/top_public_connected_rear/top_10perc_mcpas_hits.tsv')
## New names:
## * `` -> ...1
## Rows: 92 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): ...1
## dbl (2): nr_private_hits, nr_public_hits
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
top_10perc_mcpas <- top_10perc_mcpas %>% rename("sample_name" = "...1")
top_10perc_mcpas <- top_10perc_mcpas %>%
pivot_longer(-sample_name, names_to='group', values_to='Fraction McPAS hits') %>%
mutate(group = case_when(
group == 'nr_private_hits' ~ 'Top 10% most\nexpanded clones',
group == 'nr_public_hits' ~ 'Top 10% clones by\nconnectivity and\nsharing level'
)) %>%
mutate(group = factor(group, levels=c('Top 10% clones by\nconnectivity and\nsharing level',
'Top 10% most\nexpanded clones'))) %>%
mutate(`Fraction McPAS hits`=`Fraction McPAS hits`*100)
comparison <- top_10perc_mcpas
my_comparisons <- list(c('Top 10% most\nexpanded clones', 'Top 10% clones by\nconnectivity and\nsharing level'))
p <- ggboxplot(comparison, x='group', y='Fraction McPAS hits',
color='group', palette="nejm",
add='jitter') + theme(legend.position='none')
p <- p + stat_compare_means(comparisons = my_comparisons, method='t.test', paired=TRUE) +
ylab("Fraction CDR3 amino acid hits to McPAS database") + xlab("")
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/mcpas_hits_most_clonal_vs_most_connected_and_public.pdf',
plot=p,
width=4, height=5)
p
public_fraction <- comparison %>% filter(group == "Top 10% clones by\nconnectivity and\nsharing level")
print(paste("Mean Top 10% clones by connectivity and sharing level connectivity:", public_fraction %>% pull(`Fraction McPAS hits`) %>% mean())) #%>% round(0) / 10))
## [1] "Mean Top 10% clones by connectivity and sharing level connectivity: 10.0293821311376"
print(paste("Lower CI Top 10% clones by connectivity and sharing level connectivity:", public_fraction %>% pull(`Fraction McPAS hits`) %>% lower())) #%>% round(0) / 10))
## [1] "Lower CI Top 10% clones by connectivity and sharing level connectivity: 9.68561598462979"
print(paste("Upper CI Top 10% clones by connectivity and sharing level connectivity:", public_fraction %>% pull(`Fraction McPAS hits`) %>% upper())) #%>% round(0) / 10))
## [1] "Upper CI Top 10% clones by connectivity and sharing level connectivity: 10.3731482776455"
private_fraction <- comparison %>% filter(group == "Top 10% most\nexpanded clones")
print(paste("Mean Top 10% most expanded clones connectivity:", private_fraction %>% pull(`Fraction McPAS hits`) %>% mean())) #%>% round(0) / 10))
## [1] "Mean Top 10% most expanded clones connectivity: 2.07607803089866"
print(paste("Lower CI Top 10% most expanded clones connectivity:", private_fraction %>% pull(`Fraction McPAS hits`) %>% lower())) #%>% round(0) / 10))
## [1] "Lower CI Top 10% most expanded clones connectivity: 1.91647388830312"
print(paste("Upper CI Top 10% most expanded clones connectivity:", private_fraction %>% pull(`Fraction McPAS hits`) %>% upper())) #%>% round(0) / 10))
## [1] "Upper CI Top 10% most expanded clones connectivity: 2.2356821734942"
meta <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/immunarch_data/metadata.txt')
meta
## # A tibble: 93 × 7
## Sample ID COMET_ID COMET_ID_nr group group_code Kit_ID
## <chr> <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 LivMet_1 1 COMET-0026M1 26 no_chemo 0 376276
## 2 LivMet_2 2 COMET-0035M1_1 35 no_chemo 0 376276
## 3 LivMet_3 3 COMET-0059M1_1 59 no_chemo 0 376276
## 4 LivMet_4 4 COMET-0078M1 78 no_chemo 0 376276
## 5 LivMet_5 5 COMET-0011M1_1 11 no_chemo 0 376276
## 6 LivMet_6 6 COMET-0010M1 10 no_chemo 0 376276
## 7 LivMet_7 7 COMET-0015M1 15 no_chemo 0 376276
## 8 LivMet_8 8 COMET-0017M1 17 no_chemo 0 376276
## 9 LivMet_9 9 COMET-0018M2 18 short_chemo 1 376276
## 10 LivMet_10 10 COMET-0024M1 24 no_chemo 0 376276
## # … with 83 more rows
df2 <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/imnet_output/powerlaw.tsv')
same_patient <- c(
'LivMet_38', 'LivMet_11', 'LivMet_14', 'LivMet_17',
'LivMet_40', 'LivMet_42', 'LivMet_33'
)
df2 <- df2 %>% filter(!Sample %in% same_patient) %>% left_join(coverage, by=c("Sample" = "sample_id")) %>% filter(Coverage >= 5) %>%
filter(Sample != "LivMet_21")
p <- df2 %>%
filter(group != 'na') %>%
mutate(group = fct_relevel(group,
'no_chemo', 'short_chemo', 'long_chemo')) %>%
mutate(group = case_when(group == "no_chemo" ~ "No-NACT",
group == "short_chemo" ~ "Short-interval",
group == "long_chemo" ~ "Long-interval")) %>%
mutate(group = fct_relevel(group, "No-NACT", "Short-interval", "Long-interval")) %>%
ggplot(aes(x=group, y=p_val, fill=group), color='black') +
geom_quasirandom(pch=21, size=3) +
geom_hline(yintercept=.1, linetype='dashed', color='black') +
scale_y_continuous(breaks=seq(0,1,.1), limits = c(0,1)) +
theme_minimal() +
theme(legend.position="none") +
# scale_fill_manual(values = wes_palette(n=3, name="GrandBudapest1")) +
ylab("P-value") + xlab("")
p <- set_palette(p, "jco")
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/powerlaw.pdf',
plot=p,
width=5, height=5)
p
Datasets that passed the power law goodness of fit test
df2 %>% left_join(hill_auc) %>% filter(p_val > 0.1)
## Joining, by = c("Sample", "ID", "COMET_ID", "COMET_ID_nr", "group", "Kit_ID", "Coverage")
## # A tibble: 9 × 20
## Sample p_val ID COMET_ID COMET_ID_nr group group_code Kit_ID Coverage
## <chr> <dbl> <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 LivMet_2 0.143 2 COMET-003… 35 no_ch… 0 3.76e5 31.6
## 2 LivMet_23 0.154 23 COMET-004… 43 short… 1 3.76e5 24.8
## 3 LivMet_28 0.126 28 COMET-005… 53 no_ch… 0 3.76e5 20.4
## 4 LivMet_3 0.129 3 COMET-005… 59 no_ch… 0 3.76e5 29.9
## 5 LivMet_4 0.172 4 COMET-007… 78 no_ch… 0 3.76e5 6.7
## 6 LivMet_47 0.2 47 COMET-008… 82 no_ch… 0 1.25e7 11.1
## 7 LivMet_64 0.432 64 COMET-014… 140 short… 1 1.25e7 25.6
## 8 LivMet_69 0.134 69 COMET-018… 186 short… 1 1.25e7 20.2
## 9 LivMet_79 0.133 79 COMET-026… 266 short… 1 1.25e7 40.3
## # … with 11 more variables: hill_auc <dbl>, Kit UID <dbl>, Run ID <chr>,
## # Well Location <chr>, Date Posted <chr>, Gene Rearrangements <dbl>,
## # % On-target <dbl>, QC Status <chr>, NACT-group <fct>, AUC_group <int>,
## # Clonality <dbl>
df2 %>% left_join(hill_auc) %>% filter(p_val > 0.1) %>% pull(Clonality) %>% mean() %>% round(1)
## [1] 4.4
df2 %>% left_join(hill_auc) %>% filter(p_val >= 0.0) %>% drop_na() %>% pull(Clonality) %>% mean() %>% round(1)
## [1] 5.5
global_param_subset <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/global_param_subset.tsv')
global_param_subset['Sample'] <- global_param_subset['...1']
global_param_subset['...1'] <- NULL
global_param_subset
## # A tibble: 77 × 11
## nr_nodes number_edges largest_component maximal_k_core diameter assortativity
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8611 947 109 6 21 0.389
## 2 6852 727 66 4 16 0.361
## 3 34849 13527 4371 10 43 0.458
## 4 17894 4462 1026 9 19 0.507
## 5 9218 1144 168 6 25 0.435
## 6 10109 1307 171 4 20 0.444
## 7 10019 1492 357 5 20 0.395
## 8 18256 4915 1180 11 24 0.517
## 9 2351 109 8 4 4 0.663
## 10 4818 447 38 4 16 0.434
## # … with 67 more rows, and 5 more variables: average_cluster <dbl>,
## # average_degree <dbl>, clustering_coeff <dbl>, density <dbl>, Sample <chr>
global_param_subset <- global_param_subset %>% left_join(metadata) %>% left_join(coverage, by=c("Sample"="sample_id"))
global_param_subset <- global_param_subset %>% filter(Coverage > 5)
global_param_subset['connectivity_fraction'] <- global_param_subset$number_edges / global_param_subset$nr_nodes
global_param_subset
## # A tibble: 76 × 18
## nr_nodes number_edges largest_component maximal_k_core diameter assortativity
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8611 947 109 6 21 0.389
## 2 6852 727 66 4 16 0.361
## 3 34849 13527 4371 10 43 0.458
## 4 17894 4462 1026 9 19 0.507
## 5 9218 1144 168 6 25 0.435
## 6 10109 1307 171 4 20 0.444
## 7 10019 1492 357 5 20 0.395
## 8 18256 4915 1180 11 24 0.517
## 9 2351 109 8 4 4 0.663
## 10 4818 447 38 4 16 0.434
## # … with 66 more rows, and 12 more variables: average_cluster <dbl>,
## # average_degree <dbl>, clustering_coeff <dbl>, density <dbl>, Sample <chr>,
## # ID <dbl>, COMET_ID <chr>, COMET_ID_nr <dbl>, group <chr>, Kit_ID <dbl>,
## # Coverage <dbl>, connectivity_fraction <dbl>
Mean nr nodes
print(paste("Mean number of nodes:", round(mean(global_param_subset$nr_nodes), 0)))
## [1] "Mean number of nodes: 16056"
print(paste("Lower CI of nodes: ", round(lower(global_param_subset$nr_nodes), 0)))
## [1] "Lower CI of nodes: 13637"
print(paste("Upper CI of nodes: ", round(upper(global_param_subset$nr_nodes), 0)))
## [1] "Upper CI of nodes: 18475"
Mean nr edges
print(paste("Mean number of edges:", round(mean(global_param_subset$number_edges), 0)))
## [1] "Mean number of edges: 4133"
print(paste("Lower CI of edges: ", round(lower(global_param_subset$number_edges), 0)))
## [1] "Lower CI of edges: 3015"
print(paste("Upper CI of edges: ", round(upper(global_param_subset$number_edges), 0)))
## [1] "Upper CI of edges: 5251"
Mean connectivity fraction
print(paste("Mean connectivity:", round(mean(global_param_subset$connectivity_fraction), 4)))
## [1] "Mean connectivity: 0.1897"
print(paste("Lower CI of connectivity: ", round(lower(global_param_subset$connectivity_fraction), 4)))
## [1] "Lower CI of connectivity: 0.1647"
print(paste("Upper CI of connectivity: ", round(upper(global_param_subset$connectivity_fraction), 4)))
## [1] "Upper CI of connectivity: 0.2146"
degree_share = read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/degree_share.tsv')
degree_share <- degree_share %>% arrange(share_level)
degree_share$share_level[degree_share$share_level == 0] <- 1
degree_share
## # A tibble: 1,671,005 × 2
## degree share_level
## <dbl> <dbl>
## 1 0 1
## 2 0 1
## 3 0 1
## 4 0 1
## 5 0 1
## 6 0 1
## 7 0 1
## 8 0 1
## 9 0 1
## 10 0 1
## # … with 1,670,995 more rows
p <- degree_share %>%
ggplot(aes(share_level, degree)) +
stat_summary(fun = mean, geom = 'bar', fill='darkgrey') +
stat_summary(fun.data = "mean_cl_normal",
geom = 'errorbar',
width = .4) +
theme_classic() +
xlab("Number of patients") + ylab("Clonal connections (Degrees)") +
scale_x_continuous(breaks=c(1, 10, 20, 30, 40, 50))
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/degree_to_sharing.pdf',
plot=p,
width=6, height=4)
p
connectivity_to_nodes <- read_tsv("/Users/eirikhoy/Dropbox/projects/airr_comet/data/connectivity_fraction_to_nr_nodes.tsv")[, 2:3]
attach(connectivity_to_nodes)
lm.fit <- lm(fraction ~ nr_nodes, data=connectivity_to_nodes)
summary(lm.fit)
##
## Call:
## lm(formula = fraction ~ nr_nodes, data = connectivity_to_nodes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.053419 -0.019535 -0.005885 0.009346 0.179879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.862e-02 6.203e-03 6.227 1.87e-08 ***
## nr_nodes 9.300e-06 2.767e-07 33.608 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03319 on 83 degrees of freedom
## Multiple R-squared: 0.9315, Adjusted R-squared: 0.9307
## F-statistic: 1129 on 1 and 83 DF, p-value: < 2.2e-16
confint(lm.fit, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 2.628540e-02 5.095917e-02
## nr_nodes 8.749917e-06 9.850738e-06
p <- connectivity_to_nodes %>%
ggplot(aes(x=nr_nodes, y=fraction)) +
geom_point() +
theme_minimal() +
ylab("Fraction connections to clones") + xlab("Number of clones (nodes)") +
geom_smooth(method = 'lm', se=TRUE, level=.95, color='red')
ggsave(filename='/Users/eirikhoy/Dropbox/Manuscript_ImmunoSEQ_COMET/figures/connectivity_to_nodes.pdf',
plot=p,
width=6, height=5)
## `geom_smooth()` using formula 'y ~ x'
p
## `geom_smooth()` using formula 'y ~ x'
library(tidyverse)
survivial_data <- read_delim("/Users/eirikhoy/Dropbox/projects/airr_comet/data/metadata/survival_data.csv", delim=';')
## New names:
## * n_status -> n_status...13
## * n_status -> n_status...17
## Rows: 85 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (10): Sample, COMET_ID, pathology_id, date_surgery, pcrc_loc, t_status, ...
## dbl (10): age, sex, ASA, bmi, ECOG, tstage_updated, n_status...17, status, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
evenness_AUC <- read_tsv('/Users/eirikhoy/Dropbox/projects/airr_comet/data/evenness_auc.tsv')
## Rows: 92 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): COMET_ID, Sample, group, Run ID, Well Location, Date Posted, QC Sta...
## dbl (9): Evenenss_AUC, ID, COMET_ID_nr, Kit_ID, Kit UID, Gene Rearrangements...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
evenness_AUC <- evenness_AUC %>% dplyr::select(Sample, Evenenss_AUC, AUC_group)
evenness_AUC <- evenness_AUC %>% mutate(Clonality = case_when(AUC_group == 1 ~ 'High',
AUC_group == 2 ~ "Mid",
AUC_group == 3 ~ "Low"))
survivial_data <- survivial_data %>% left_join(evenness_AUC)
## Joining, by = "Sample"
survivial_data$Clonality <- as.factor(survivial_data$Clonality)
attach(survivial_data)
table(status)
## status
## 0 1
## 44 41
table(Clonality)
## Clonality
## High Low Mid
## 26 16 43
library(survival)
fit.surv <- survfit(Surv(survival, status) ~ 1)
plot(fit.surv, xlab = "Months",
ylab = "Estimated Probability of Survival")
Clonality <- as.factor(Clonality)
fit.clonal <- survfit(Surv(survival, status) ~ Clonality)
plot(fit.clonal, xlab = "Months",
ylab = "Estimated Probability of Survival", col = c(2,3,4))
legend("bottomleft", levels(Clonality), col = c(2,3, 4), lty = 1)
logrank.test <- survdiff(Surv(survival, status) ~ Clonality)
logrank.test
## Call:
## survdiff(formula = Surv(survival, status) ~ Clonality)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Clonality=High 26 11 12.0 0.07939 0.1128
## Clonality=Low 16 10 9.7 0.00906 0.0121
## Clonality=Mid 43 20 19.3 0.02383 0.0453
##
## Chisq= 0.1 on 2 degrees of freedom, p= 0.9
fit.cox <- coxph(Surv(survival, status) ~ Clonality)
summary(fit.cox)
## Call:
## coxph(formula = Surv(survival, status) ~ Clonality)
##
## n= 85, number of events= 41
##
## coef exp(coef) se(coef) z Pr(>|z|)
## ClonalityLow 0.1163 1.1233 0.4405 0.264 0.792
## ClonalityMid 0.1198 1.1273 0.3758 0.319 0.750
##
## exp(coef) exp(-coef) lower .95 upper .95
## ClonalityLow 1.123 0.8902 0.4737 2.664
## ClonalityMid 1.127 0.8871 0.5397 2.354
##
## Concordance= 0.528 (se = 0.045 )
## Likelihood ratio test= 0.11 on 2 df, p=0.9
## Wald test = 0.11 on 2 df, p=0.9
## Score (logrank) test = 0.11 on 2 df, p=0.9
summary(fit.cox)$logtest[1]
## test
## 0.1147714
summary(fit.cox)$waldtest[1]
## test
## 0.11
summary(fit.cox)$sctest[1]
## test
## 0.1128919
subset <- (Clonality != "Mid")
Clonality <- as.factor(Clonality[])
fit.clonal <- survfit(Surv(survival, status) ~ Clonality,
subset = subset)
plot(fit.clonal, xlab = "Months",
ylab = "Estimated Probability of Survival", col = c(2,4))
legend("bottomleft", levels(factor(c("High", "Low"), levels=c("High", "Low"))), col = c(2,4), lty = 1)
subset <- (Clonality != "Mid")
logrank.test <- survdiff(Surv(survival, status) ~ Clonality,
subset=subset)
logrank.test
## Call:
## survdiff(formula = Surv(survival, status) ~ Clonality, subset = subset)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Clonality=High 26 11 11.14 0.00171 0.00378
## Clonality=Low 16 10 9.86 0.00193 0.00378
##
## Chisq= 0 on 1 degrees of freedom, p= 1
subset <- (Clonality != "Mid")
fit.cox <- coxph(Surv(survival, status) ~ Clonality,
subset=subset)
summary(fit.cox)
## Call:
## coxph(formula = Surv(survival, status) ~ Clonality, subset = subset)
##
## n= 42, number of events= 21
##
## coef exp(coef) se(coef) z Pr(>|z|)
## ClonalityLow 0.02739 1.02777 0.44531 0.062 0.951
## ClonalityMid NA NA 0.00000 NA NA
##
## exp(coef) exp(-coef) lower .95 upper .95
## ClonalityLow 1.028 0.973 0.4294 2.46
## ClonalityMid NA NA NA NA
##
## Concordance= 0.478 (se = 0.057 )
## Likelihood ratio test= 0 on 1 df, p=1
## Wald test = 0 on 1 df, p=1
## Score (logrank) test = 0 on 1 df, p=1
summary(fit.cox)$logtest[1]
## test
## 0.003782242
summary(fit.cox)$waldtest[1]
## test
## 0
summary(fit.cox)$sctest[1]
## test
## 0.003784237